From 1f54877c29c130681dedee407491e3ac63163437 Mon Sep 17 00:00:00 2001 From: ThomasV Date: Wed, 7 Jul 2010 11:05:00 +0000 Subject: [PATCH] fix text layer extraction (bug 21526); patch by Simon Lipp --- includes/DjVuImage.php | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/includes/DjVuImage.php b/includes/DjVuImage.php index 75df0fd59f..f14d8d64cb 100644 --- a/includes/DjVuImage.php +++ b/includes/DjVuImage.php @@ -259,8 +259,21 @@ class DjVuImage { $txt = UtfNormal::cleanUp( $txt ); } $txt = preg_replace( "/[\013\035\037]/", "", $txt ); - $txt = htmlspecialchars($txt); - $txt = preg_replace( "/\((page\s[\d-]*\s[\d-]*\s[\d-]*\s[\d-]*\s*\"([^<]*?)\"\s*|)\)/s", "", $txt ); + $reg = << # Text to match is composed of atoms of either: + \\\\. # - any escaped character + | # - any character different from " and \ + [^"\\\\]+ + )*?) + "\s*\) + | # Or page can be empty ; in this case, djvutxt dumps () + \(\s*()\)/sx +EOR; + $txt = preg_replace_callback( $reg, + create_function('$matches', 'return \'\';'), + $txt ); + $txt = "\n\n\n" . $txt . "\n\n"; $xml = preg_replace( "//", "", $xml ); $xml = $xml . $txt. '' ; -- 2.20.1